clear all
set more off
cap log close

********************************************************************************
***** Project: The Short and Long Term Effects of In-Person Performance Feedback
********************************************************************************
***** A. R. Soetevent & G. J. Romensen
********************************************************************************
***** Removes or scrambles in "DataMainAnalysis.dta", "FullDataRelPerformanceAndFeedback.dta",
***** "coachlogs.dta" and "FeedBackReceived.dta" all driver-related information not 
***** uses in the analysis. The resulting data sets with the added word "DEPO" 
***** can be made available upon request, see README.pdf for details
********************************************************************************




* global filepath "C:\JPEMicReplication"
* global paperpath "$filepath\TablesGraphs"
log using "$filepath/Logs/11DataToDepository.log", replace

/*** Notes ***/
* No fuel economy observations postfeedback in urban area.

import delimited "$filepath\Data\Using databases\Scrambling drivers and dates for data depository - ALL.txt"
drop toevalsgetal1 toevalsgetal2 toevalsgetal3
save "$filepath\ConstructedData\DataToDepositoryKeyALL.dta", replace
clear all


**I. DataMainAnalysis.dta
use "$filepath\ConstructedData\DataMainAnalysisALL.dta"

** Renumber lijn_nr + dienst_nr ZH to distinguish them from FR
replace lijn_nr = lijn_nr + 1000 if regio == "ZH"
tab lijn_nr regio
* Check!
replace dienst_nr = dienst_nr + 10000 if regio == "ZH"


*** A. Drop variables ***
*drop geplande_eindtijd_rit werkelijke_begintijd_rit werkelijke_eindtijd_rit 
drop heen_van heen_naar
drop ves_naam ves_naam_chauf standplaats startdag einddag midtijd
drop rit_variant wagenpark_nr_da
drop rit_nr rijden_tijd rijden_afstand stationair_tijd stationair_verbruik stationair_kort_verbruik verbruik_calc wagenpark_nr_dp rijden_afstand_dp acceleratie_score bochten_score rem_score
drop rijden_verbruik
drop m_rituitval_ind_da m_rituitval_ind_dp afstand_dp_acceleratie afstand_dp_remmen afstand_dp_bochten afstand_da verbruik_calc_ok
drop wagenpark_nr

** + Additional variables ZH data
drop driverid coachidentifier gender birthyear employmentyear


*** B. Replace the original (already anonymous) chauf_nr_rug provided by the company + the line numbers of the trips (line_nr) + base-location driver by randomly generated numbers
merge m:1 chauf_nr_rug using "$filepath\ConstructedData\DataToDepositoryKeyALL.dta", keepusing(rndchauf_nr_rug)
drop if _merge==2
drop _merge
merge m:1 lijn_nr using "$filepath\ConstructedData\DataToDepositoryKeyALL.dta", keepusing(rndlijn_nr)
drop if _merge==2
drop _merge
merge m:1 dienst_nr using "$filepath\ConstructedData\DataToDepositoryKeyALL.dta", keepusing(rnddienst_nr)
drop if _merge==2
drop _merge
merge m:1 chauf_nr_rug using "$filepath\ConstructedData\DataToDepositoryKeyALL.dta", keepusing(rndlocid)
drop if _merge==2
drop _merge

drop chauf_nr_rug lijn_nr dienst_nr
rename rndchauf_nr_rug chauf_nr_rug
rename rndlijn_nr lijn_nr
rename rnddienst_nr dienst_nr
order chauf_nr_rug treatment datum lijn_nr dienst_nr

replace eco_coach_nr_rug=939 if eco_coach_nr_rug==73
replace eco_coach_nr_rug=1404 if eco_coach_nr_rug==185
replace eco_coach_nr_rug=519 if eco_coach_nr_rug==215
replace eco_coach_nr_rug=1286 if eco_coach_nr_rug==270
replace eco_coach_nr_rug=1610 if eco_coach_nr_rug==411
replace eco_coach_nr_rug=531 if eco_coach_nr_rug==416

replace eco_coach_nr_rug=936 if eco_coach_nr_rug==1228
replace eco_coach_nr_rug=594 if eco_coach_nr_rug==1233
replace eco_coach_nr_rug=808 if eco_coach_nr_rug==244358
replace eco_coach_nr_rug=1297 if eco_coach_nr_rug==1280
replace eco_coach_nr_rug=1618 if eco_coach_nr_rug==244358
replace eco_coach_nr_rug=731 if eco_coach_nr_rug==1644
replace eco_coach_nr_rug=1402 if eco_coach_nr_rug==1802


compress
** SAVE RESULTING DEPOSITORY FILE **
save "$filepath\DEPO\DataMainAnalysisDEPO.dta", replace



*** II.  "FullDataRelPerformanceAndFeedback.dta"
clear all
use "$filepath\Data\Using databases\Incidence check\FullDataRelPerformanceAndFeedback.dta"
merge m:1 chauf_nr_rug using "$filepath\ConstructedData\DataToDepositoryKeyALL.dta", keepusing(rndchauf_nr_rug)
drop if _merge==2
drop _merge
drop chauf_nr_rug
rename rndchauf_nr_rug chauf_nr_rug
order chauf_nr_rug
** SAVE RESULTING FILE **
compress
save "$filepath\DEPO\FullDataRelPerformanceAndFeedbackDEPO.dta", replace


*** III.  "coachlogs.dta"
clear all
use "$filepath\Data\Using databases\coachlogs.dta"
merge m:1 chauf_nr_rug using "$filepath\ConstructedData\DataToDepositoryKeyALL.dta", keepusing(rndchauf_nr_rug)
drop if _merge==2
drop _merge
drop chauf_nr_rug
rename rndchauf_nr_rug chauf_nr_rug
replace eco_coach_nr_rug=939 if eco_coach_nr_rug==73
replace eco_coach_nr_rug=1404 if eco_coach_nr_rug==185
replace eco_coach_nr_rug=519 if eco_coach_nr_rug==215
replace eco_coach_nr_rug=1286 if eco_coach_nr_rug==270
replace eco_coach_nr_rug=1610 if eco_coach_nr_rug==411
replace eco_coach_nr_rug=531 if eco_coach_nr_rug==416
order chauf_nr_rug
** SAVE RESULTING FILE **
compress
save "$filepath\DEPO\coachlogsDEPO.dta", replace

*** IV.  "FeedBackReceived.dta"
clear all
use "$filepath\ConstructedData\FeedBackReceived.dta"
merge m:1 chauf_nr_rug using "$filepath\ConstructedData\DataToDepositoryKeyALL.dta", keepusing(rndchauf_nr_rug)
drop if _merge==2
drop _merge
drop chauf_nr_rug
rename rndchauf_nr_rug chauf_nr_rug
order chauf_nr_rug
** SAVE RESULTING FILE **
compress
save "$filepath\DEPO\FeedBackReceivedDEPO.dta", replace


*** V.  "coachDrivingAndCoachingDatesDEPO.dta"
*********************************************************************************
*** Determines for each day (FR and ZH) whether is coach is coaching or driving
*********************************************************************************
clear all


/*** Notes ***/
* No fuel economy observations postfeedback in urban area.

use "$filepath\DEPO\DataMainAnalysisDEPO.dta"

* Create dummy for drivers who are Eco-coaches [chauf_nr_rug: randomly generated depository numbers!]
gen byte IsCoach = 0 
local eco_nr "939 1404 519 1286 1610 531 594 731 808 936 1297 1402 1618"

foreach x of local eco_nr {
replace  IsCoach = 1 if chauf_nr_rug==`x'
}
label variable IsCoach "Driver is also coach"

tabstat dep_fueleconomyLpKM dep_acceleratie dep_bochten dep_rem punctuality gebjaar jaardienst fte geslacht ochtendspits avondspits weekendrit vakantierit uitleenrit fulltimer IsCoach vdl* intouro iris12 if regio=="FR", by(IsCoach) s(mean n)

tabstat dep_fueleconomyLpKM dep_acceleratie dep_bochten dep_rem  punctuality gebjaar jaardienst fte geslacht ochtendspits avondspits weekendrit vakantierit uitleenrit fulltimer IsCoach if regio=="ZH", by(IsCoach) s(mean n)

bysort chauf_nr_rug datum: egen startshift = min(werkelijke_begintijd_rit)
format startshift %tc
label variable startshift "start time shift"

bysort chauf_nr_rug datum: egen endshift = max(werkelijke_eindtijd_rit)
format endshift %tc
label variable endshift "end time shift"

* Determine dates at which eco-coach DRIVES
keep datum  chauf_nr_rug eco_coach_nr_rug coachdatum_* IsCoach startshift endshift
sort datum


local eco_nr "939 1404 519 1286 1610 531 594 731 808 936 1297 1402 1618"
foreach x of local eco_nr {
    gen byte coach`x'Driveshulp = 0 
		replace  coach`x'Driveshulp = 1 if chauf_nr_rug==`x'
	gen byte coach`x'Coacheshulp = 0 
		replace  coach`x'Coacheshulp = 1 if eco_coach_nr_rug==`x' & (datum==coachdatum_1|datum==coachdatum_2|datum==coachdatum_3|datum==coachdatum_4|datum==coachdatum_5)
	gen  coach`x'startshifthulp = . 
		replace  coach`x'startshifthulp = startshift if chauf_nr_rug==`x'
	gen  coach`x'endshifthulp = . 
		replace  coach`x'endshifthulp = endshift if chauf_nr_rug==`x'
	
	by datum: egen coach`x'Drives = max(coach`x'Driveshulp)
	by datum: egen coach`x'Coaches = max(coach`x'Coacheshulp)
	by datum: egen coach`x'startshift = max(coach`x'startshifthulp)
	format  coach`x'startshift %tc
	by datum: egen coach`x'endshift = max(coach`x'endshifthulp)
	format  coach`x'endshift %tc
	label variable coach`x'Drives "This date, driver `x' = Driving"
	label variable coach`x'Coaches "This date, driver `x' = Coaching"
	label variable coach`x'startshift "Start shift Coach no. `x'"
	label variable coach`x'endshift "End shift Coach no. `x'"
	}

duplicates drop datum, force
drop chauf_nr_rug coachdatum_* eco_coach_nr_rug coach*hulp
compress
/*** Save file with dates that coaches drive or coach ***/
save "$filepath\DEPO\coachDrivingAndCoachingDatesDEPO.dta", replace
* Note: used by X07-HetEffectsCoaching.do


log close
